Apple stocks time course


In [1]:
%pylab inline

from matplotlib import style
style.use('fivethirtyeight')


Populating the interactive namespace from numpy and matplotlib

In [2]:
# standard python packages
import requests
import pandas as pd

In [3]:
# machine learning packages
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

Use this API for downloading daily stock prices. Read a basic example here


In [4]:
myquery = 'https://financialmodelingprep.com/api/v3/historical-price-full/aapl?serietype=line'

appl = requests.get(myquery) # APPLE stocks
type(appl)


Out[4]:
requests.models.Response

In [5]:
histprices = appl.json() # get all historical prices
# only the last 365 days
histprices = histprices['historical'][-365:] # last year
histprices[0]


Out[5]:
{u'close': 217.36, u'date': u'2018-10-15'}

In [6]:
# convert to DataFrame
aapl_df = pd.DataFrame.from_dict(histprices)

In [7]:
aapl_df.head(5)


Out[7]:
close date
0 217.36 2018-10-15
1 222.15 2018-10-16
2 221.19 2018-10-17
3 216.02 2018-10-18
4 219.31 2018-10-19

In [8]:
plt.figure(figsize=(10,4))
plt.plot(aapl_df['close'].values, color='black', lw = 2)#, aapl_df['close'].values)
plt.ylabel('Value'), plt.xlabel('Day')
plt.grid('on')
#histpricesdf.plot(x='date', y = 'close')



In [9]:
def polynomial_fit(x, y , degree):
    """
    Polynomial fit on the set of x,y values
    
    x (independent variable, column vector)
    y (dependent variable, row vector)
    degreee (int)
    
    Return the squared root of the mean squared error
    """
    

    features = PolynomialFeatures(degree)
    x_poly = features.fit_transform(x)
    
    model = LinearRegression()
    model.fit(x_poly, y)
    y_poly_pred = model.predict(x_poly)
    
    rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
    r2 = r2_score(y,y_poly_pred)
    
    return (rmse, r2)

In [10]:
y = aapl_df['close'].values
x = np.arange(y.size)

# you want a COLUMN vector (many samples, 1 feature)
X = x.reshape(-1,1)

poly = list()
for i in range(2,10):
    rmse, r2 = polynomial_fit(X,y,i)
    myval = (i, rmse, r2)
    poly.append(r2)
    print('Polynomial degree %d, RMSE = %2.5f, R2 = %2.4f'%myval)
    
plt.figure(figsize=(6,4))
plt.plot(range(2,10), poly, 'ko-', color='brown');
plt.xticks(range(2,10));


Polynomial degree 2, RMSE = 21.21249, R2 = 0.7965
Polynomial degree 3, RMSE = 18.22071, R2 = 0.8499
Polynomial degree 4, RMSE = 17.75331, R2 = 0.8575
Polynomial degree 5, RMSE = 9.18511, R2 = 0.9618
Polynomial degree 6, RMSE = 8.84660, R2 = 0.9646
Polynomial degree 7, RMSE = 9.59979, R2 = 0.9583
Polynomial degree 8, RMSE = 12.52416, R2 = 0.9291
Polynomial degree 9, RMSE = 13.95951, R2 = 0.9119

In [11]:
# Polynomial transform
y = aapl_df['close'].values
x = np.arange(x.size)

# you want a COLUMN vector (many samples, 1 feature)
X = x.reshape(-1,1)

# you want a ROW vector (one sample, many features)
#X = X.reshape(1,1)

polynomial_features= PolynomialFeatures(degree=6)
x_poly = polynomial_features.fit_transform(X)

model = LinearRegression()
model.fit(x_poly, y)
y_poly_pred = model.predict(x_poly)

rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print('RMSE of polynomial degree i is %2.4f'%rmse)
print('R2 of polynomial regression is %2.4f'%r2)


RMSE of polynomial degree i is 8.8466
R2 of polynomial regression is 0.9646

In [12]:
plt.figure(figsize=(10,4))
plt.plot(x,y, color='black', lw =2 )
# sort the values of x before line plot
plt.plot(x, y_poly_pred, color='brown', lw =1.5 )
plt.ylabel('Value'), plt.xlabel('Day')
plt.grid('on')



In [ ]: